/*
	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	
	SIAB Preparation
	
	Master Do-File
	
	
	Author(s): Wolfgang Dauth, Johann Eppelsheimer
	
	Version: 1.0
	Created: 2018-06-01
	
	
	For details refer to Dauth and Eppelsheimer 2018, Publication...
	http://some.url.de
	
	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/



********************************************************************************
* Set up your working directory and set the observation period
********************************************************************************
cd "N:\...\stata\"						// your working directory		(example: N:\...\my_project\stata\)

global prog		"prog\prep"			// your preparation do-files	(default: prog\prep)
global log		"log\prep"			// your preparation log-files	(example: log\prep)
global graphs	"graphs\prep"		// your preparation graphs		(example: graphs\prep)
global data		"data"				// your data folder				(example: data)

* path to modified version of '01_SIAB_bio.do' from FDZ-Methodenreport 06/2017 (Johanne Eberle & Alexandra Schmucker)
global fileEberleSchmucker "prog\prep\Eberle_Schmucker2017\01_SIAB_bio_MODIFIED.do"  // in order to make '01_SIAB_bio.do' compatible with our master-file the first and last lines of '01_SIAB_bio.do' have been commented out

* path to BHP
global fileBHP "N:\Ablagen\D01700-Projekte\D01700-Projekte-FDZ\Datensaetze\_Endprodukte\SIAB\SIAB_7514_v1\SIAB_7514_v1_bhp_basis_v1.dta"

* observation period
global minYear = 1975	// begin of your observation period		(default: 1975)
global maxYear = 2014	// end of your observation period		(default: 2014)





********************************************************************************
* Create folders
********************************************************************************
shell mkdir $data
shell mkdir $graphs
shell mkdir $log

dir


********************************************************************************
* Set up Stata
********************************************************************************
set more off, permanent
set linesize 255

clear all
cap log close


********************************************************************************
* Load SIAB and generate variables 'jahr' and age
********************************************************************************
use \\IAB.baintern.de\dfs\017\Ablagen\D01700-Projekte\D01700-Projekte-FDZ\Datensaetze\_Endprodukte\SIAB\SIAB_7514_v1\siab_7514_v1.dta

* generate jahr (= year)
gen jahr = year(begepi)
label variable jahr "year"

* generate age
gen age = jahr - gebjahr
label variable age "age (in years)"


********************************************************************************
* 01) Split episodes that span over one year
********************************************************************************
do ${prog}\01_PREP_ieb_split_episodes.do


********************************************************************************
* Generate biographical variables (tage_erw, tage_bet, ...)
* ... by using a slightly modified version of the do-file '01_SIAB_bio.do' from FDZ-Methodenreport 06/2017 (Johanna Eberle & Alexandra Schmucker)
********************************************************************************
cap log close
log using ${log}\02_PREP_ieb_EberleSchmucker.log, replace

do $fileEberleSchmucker

cap log close


********************************************************************************
* Merge BHP data and switch the labels from German to English
********************************************************************************
do ${prog}\02_PREP_ieb_merge_BHP.do


********************************************************************************
* 03a) Map 3-digit industries to 1-digit industries (based on 'Statistisches Bundesamt')
* 03b) Map 3-digit industries to 1-digit industries (based on IAB establishment panel)
********************************************************************************
do ${prog}\03a_PREP_ieb_industries_1digit_destatis.do
do ${prog}\03b_PREP_ieb_industries_1digit_iab.do


********************************************************************************
* 04) Add Blossfeld occupations
********************************************************************************
do ${prog}\04_PREP_ieb_occ_blossfeld.do


********************************************************************************
* 05) Imputation of the education variable based on Fitzenberger, Osikominu & Voelter (2008) (IP 1)
********************************************************************************
do ${prog}\05_PREP_ieb_educ_imputation.do


********************************************************************************
* 06) Add the contribution assessment ceiling
********************************************************************************
do ${prog}\06_PREP_ieb_wages_assessment_ceiling.do


********************************************************************************
* 07) Add the marginal part-time income threshold and flag marginal wages
********************************************************************************
do ${prog}\07_PREP_ieb_wages_marginal.do


********************************************************************************
* 08) Deflate wages, marginal part-time income threshold and contribution assessment ceiling
********************************************************************************
do ${prog}\08_PREP_ieb_wages_deflation.do


********************************************************************************
* Restrict the data set to selected years (in order to speed up the wage imputation!)
********************************************************************************
keep if jahr >= $minYear & jahr <= $maxYear


********************************************************************************
* 09) Impute wages (2-step procedure, based on Dustmann et al. (2009) and Card et al. (2013))
********************************************************************************
do ${prog}\09_PREP_ieb_wages_imputation.do

* save intermediate version to disk
save ${data}\siab_intermediate.dta, replace


********************************************************************************
* 10) Treat parallel episodes:
* Generate informatin on parallel episodes
* Keep only 'main' episode
********************************************************************************
do ${prog}\10_PREP_ieb_parallel_episodes.do


********************************************************************************
* 11) Transfer data set into yearly panel
********************************************************************************
do ${prog}\11_PREP_ieb_yearly_panel.do


********************************************************************************
* 12) Restrict the data to certain groups
********************************************************************************
do ${prog}\12_PREP_ieb_restrictions.do


********************************************************************************
* Clean up
********************************************************************************
do ${prog}\13_PREP_ieb_clean_up.do

* erase ${data}\siab_intermediate.dta				// delete intermediate version of SIAB


********************************************************************************
* Save data
********************************************************************************
save ${data}\siab_clean.dta, replace
clear all
